COMPARING RAPIDEYE AND LANDSAT 8 OLI ACCURACIES
##########################
#SET WORKING DIRECTORY
##########################
# clean the working environment and graphics
rm(list=ls()); graphics.off()
# specify path to your working directory
setwd("C:/Users/oyelo/Desktop/advanced remote sensing/exercise 3/BIODEV training manual, data and scripts-20170403/R/R/Data")
getwd()
## [1] "C:/Users/oyelo/Desktop/advanced remote sensing/exercise 3/BIODEV training manual, data and scripts-20170403/R/R/Data"
#Set path for packages installation and load the packages
.libPaths("C:/Users/oyelo/Desktop/advanced remote sensing/exercise 3")
library(sp)
library(rgdal)
library(raster)
library(yaImpute)
library(fastICA)
##########################
#RUN FUNCTIONS
##########################
#####
# Function for accuracy assessment
#####
print.accuracy <- function(obs, fit, main) {
print(main)
res <- fit-obs
bias <- signif(mean(res), 3); cat("BIAS:", bias)
bias.r <- signif(bias/mean(obs)*100, 3); cat( " (",bias.r,"%)\n", sep="")
RMSE <- signif(sqrt(sum((res^2)/length(res))), 3); cat("RMSE:",RMSE)
RMSE.r <- signif(RMSE/mean(obs)*100, 3); cat( " (",RMSE.r,"%)\n", sep="")
R2 <- signif(cor(obs,fit,method="pearson")^2, 3); #pseudo R^2
cat("Pseudo R.squared: ", R2,"\n\n", sep="")
ret <- c(bias, bias.r, RMSE, RMSE.r, R2)
}
#####
# Function for leave-one-out cross-validation
#####
cv.plot <- function(data, yvar, xvars, k, yai.method) {
pred <- numeric(nrow(data)) #empty vector for results
for(i in 1:nrow(data)){
ref <- which(row.names(data)!=i)
val <- which(row.names(data)==i)
x <- as.data.frame(data[,xvars])
y <- as.data.frame(data[ref, yvar])
names(y) <- yvar
row.names(y) <- row.names(data)[-val]
knn <- yai(x=x, y=y, k=k, data=data, method=yai.method)
imp <- impute(knn, k=k, method="dstWeighted", ancillaryData=y)
pred[val] <- imp[as.numeric(row.names(imp)) %in% val, yvar]
}
#print results
res <- print.accuracy(data[,yvar], pred, yvar)
xlim <- max(data[,yvar])
plot(data[,yvar], pred, main=yvar, xlab="Observed", ylab="Predicted",
xlim=c(0,xlim), ylim=c(0,xlim))
abline(0,1)
return(res)
}
##########################
#LOAD INPUT DATA
##########################
# load required field data
load("Kou_input_plots.rda") #"Plots" are required because of coordinates
load("PlotData_Kou.rda") #Plot-level results
’
# "re_refl.tiff" is a spatial subset of Rapid Eye satellite image
# acquired 11 October 2013
# load multiband satellite image stack
rapideye<- stack("C:/Users/oyelo/Desktop/advanced remote sensing/exercise 3/BIODEV training manual, data and scripts-20170403/R/R/Data/re_refl.tif")
rapideye#see properties of RasterStack object
## class : RasterStack
## dimensions : 3510, 3720, 13057200, 5 (nrow, ncol, ncell, nlayers)
## resolution : 5, 5 (x, y)
## extent : 605385, 623985, 1289655, 1307205 (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=utm +zone=30 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0
## names : re_refl.1, re_refl.2, re_refl.3, re_refl.4, re_refl.5
## min values : 11, 362, 411, 827, 922
## max values : 6928, 7321, 7326, 7059, 7456
# aggregare ERapid EYe
rapideye.30m<-aggregate(rapideye, fact=6, fun=mean)
rapideye.30m
## class : RasterBrick
## dimensions : 585, 620, 362700, 5 (nrow, ncol, ncell, nlayers)
## resolution : 30, 30 (x, y)
## extent : 605385, 623985, 1289655, 1307205 (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=utm +zone=30 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0
## data source : C:\Users\oyelo\AppData\Local\Temp\Rtmpmg7tL3\raster\r_tmp_2017-04-11_065243_11012_17175.grd
## names : re_refl.1, re_refl.2, re_refl.3, re_refl.4, re_refl.5
## min values : 182.2500, 522.5000, 552.5278, 1039.5556, 1092.5278
## max values : 2946.083, 3598.556, 4071.917, 4346.778, 5084.139
#remove unnecessary files
rm(rapideye)
# the image has five spectral bands corresponding to blue, green, red, red edge
# near infrared (NIR) parts of the electromagnetic
# spectrum
# rename bands
bands_re <- c("b.re","g.re","r.re","rededge","nir.re")
names(rapideye.30m) <- bands_re
# visualize the image
plotRGB(rapideye.30m, r="r.re", g="g.re", b="b.re", stretch="lin")
# the image covers 12 km x 12 km area around sentinel site mid-point
# this is so called "true-color" composite as it best corresponds to colors seen by eye
# several "false-color" composites exist for visualization
# here, green vegetation is seen in red (strong reflectance in near infrared)
plotRGB(rapideye.30m, r="nir.re", g="r.re", b="g.re", stretch="lin")
# "Landsat.tiff" is a spatial subset of Landsat 8 OLI satellite image
# acquired 11 October 2013
# load multiband satellite image stack
landsat<- stack("C:/Users/oyelo/Desktop/advanced remote sensing/exercise 3/BIODEV training manual, data and scripts-20170403/R/R/Data/ls8_sr_2014_02_16.tif")
landsat#see properties of RasterStack object
## class : RasterStack
## dimensions : 585, 620, 362700, 6 (nrow, ncol, ncell, nlayers)
## resolution : 30, 30 (x, y)
## extent : 605385, 623985, 1289655, 1307205 (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=utm +zone=30 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0
## names : ls8_sr_2014_02_16.1, ls8_sr_2014_02_16.2, ls8_sr_2014_02_16.3, ls8_sr_2014_02_16.4, ls8_sr_2014_02_16.5, ls8_sr_2014_02_16.6
## min values : 484, 726, 876, 1614, 2005, 1420
## max values : 1578, 2298, 3041, 3977, 7361, 13529
# the image has six spectral bands corresponding to blue, green, red,
# near infrared (NIR) and short waveinfrared (SWIR) parts of the electromagnetic
# spectrum
# rename bands
bands_l8 <- c("b","g","r","nir","swir1","swir2")
names(landsat) <- bands_l8
# visualize the image
plotRGB(landsat, r="r", g="g", b="b", stretch="lin")
# the image covers 12 km x 12 km area around sentinel site mid-point
# this is so called "true-color" composite as it best corresponds to colors seen by eye
# several "false-color" composites exist for visualization
# here, green vegetation is seen in red (strong reflectance in near infrared)
plotRGB(landsat, r="nir", g="r", b="g", stretch="lin")
# merge Plot.data with plots (coordinates only)
data <- merge(Plot.data, plots[c("plot.id","lat","lon")], by="plot.id", all.x=TRUE)
# reproject coordinates from Lat/Lon to UTM
# add columns for "utm_x" and "utm_y"
data[c("utm_x","utm_y")] <- as.data.frame(project(cbind(data$lon, data$lat),
"+proj=utm +zone=30 ellps=WGS84"))
# plot image with plots
plotRGB(rapideye.30m, r=4, g=3, b=2, stretch="lin")
points(data$utm_x, data$utm_y, pch=19, col="yellow")
# merge Plot.data with plots (coordinates only)
data <- merge(Plot.data, plots[c("plot.id","lat","lon")], by="plot.id", all.x=TRUE)
# reproject coordinates from Lat/Lon to UTM
# add columns for "utm_x" and "utm_y"
data[c("utm_x","utm_y")] <- as.data.frame(project(cbind(data$lon, data$lat),
"+proj=utm +zone=30 ellps=WGS84"))
# plot image with plots
plotRGB(landsat, r=4, g=3, b=2, stretch="lin")
points(data$utm_x, data$utm_y, pch=19, col="yellow")
# for further analysis, we need to extract pixel values for the plots
data[bands_re] <- as.data.frame(extract(rapideye.30m, cbind(data$utm_x, data$utm_y),
method='bilinear'))
# see how data looks now
head(data)
## plot.id cluster plot count.ha ba.ha agb.ha bgb.ha agc.ha
## 1 101 1 1 120 2.774560 8.565222 2.355436 4.025654
## 2 102 1 2 20 1.517970 7.105600 1.954040 3.339632
## 3 103 1 3 240 2.021866 5.727592 1.575088 2.691968
## 4 104 1 4 20 2.202256 11.602759 3.190759 5.453297
## 5 105 1 5 30 2.387948 10.386064 2.856168 4.881450
## 6 106 1 6 40 3.362431 15.486342 4.258744 7.278581
## bgc.ha v.ha dbh.mean dbh.qdr.mean h.mean h.loreys
## 1 1.1070550 7.449620 16.683333 18.56205 6.231667 6.392793
## 2 0.9183987 6.245248 30.900000 31.63927 9.350000 9.795736
## 3 0.7402913 4.979081 8.583333 16.99223 3.882083 5.863372
## 4 1.4996566 10.312879 37.400000 37.57286 10.900000 11.149689
## 5 1.3423988 9.120726 31.500000 32.84413 8.800000 9.094006
## 6 2.0016097 13.900897 32.200000 34.35429 9.162500 9.843287
## count.ha.sdt ba.ha.sdt agb.ha.sdt bgb.ha.sdt agc.ha.sdt bgc.ha.sdt
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## v.ha.sdt count.ha.ddw agb.ha.ddw v.ha.ddw agc.ha.ddw c.can lat
## 1 0 0 0.0000000 0.000000 0.00000000 18.50408 11.69887
## 2 0 0 0.0000000 0.000000 0.00000000 12.29554 11.70358
## 3 0 100 0.1403026 2.775008 0.06594222 20.16882 11.69863
## 4 0 0 0.0000000 0.000000 0.00000000 10.68749 11.70473
## 5 0 0 0.0000000 0.000000 0.00000000 13.00654 11.70902
## 6 0 0 0.0000000 0.000000 0.00000000 22.17338 11.70736
## lon utm_x utm_y b.re g.re r.re rededge nir.re
## 1 -1.982213 610922.2 1293456 819.4953 1366.672 2012.315 2481.336 3365.917
## 2 -1.982173 610924.7 1293977 1229.4334 1855.866 2637.134 3025.833 3762.842
## 3 -1.978797 611294.6 1293430 851.0370 1334.529 1921.262 2374.430 3161.094
## 4 -1.975357 611667.1 1294106 1104.8116 1758.811 2561.255 2980.392 3798.727
## 5 -1.977112 611474.1 1294580 839.8548 1343.992 1987.241 2478.641 3291.341
## 6 -1.976407 611551.6 1294397 870.4163 1422.145 2121.354 2550.309 3316.303
# remove unnecessary data
rm(Plot.data, plots)
# for further analysis, we need to extract pixel values for the plots
data[bands_l8] <- as.data.frame(extract(landsat, cbind(data$utm_x, data$utm_y),
method='bilinear'))
# see how data looks now
head(data)
## plot.id cluster plot count.ha ba.ha agb.ha bgb.ha agc.ha
## 1 101 1 1 120 2.774560 8.565222 2.355436 4.025654
## 2 102 1 2 20 1.517970 7.105600 1.954040 3.339632
## 3 103 1 3 240 2.021866 5.727592 1.575088 2.691968
## 4 104 1 4 20 2.202256 11.602759 3.190759 5.453297
## 5 105 1 5 30 2.387948 10.386064 2.856168 4.881450
## 6 106 1 6 40 3.362431 15.486342 4.258744 7.278581
## bgc.ha v.ha dbh.mean dbh.qdr.mean h.mean h.loreys
## 1 1.1070550 7.449620 16.683333 18.56205 6.231667 6.392793
## 2 0.9183987 6.245248 30.900000 31.63927 9.350000 9.795736
## 3 0.7402913 4.979081 8.583333 16.99223 3.882083 5.863372
## 4 1.4996566 10.312879 37.400000 37.57286 10.900000 11.149689
## 5 1.3423988 9.120726 31.500000 32.84413 8.800000 9.094006
## 6 2.0016097 13.900897 32.200000 34.35429 9.162500 9.843287
## count.ha.sdt ba.ha.sdt agb.ha.sdt bgb.ha.sdt agc.ha.sdt bgc.ha.sdt
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## v.ha.sdt count.ha.ddw agb.ha.ddw v.ha.ddw agc.ha.ddw c.can lat
## 1 0 0 0.0000000 0.000000 0.00000000 18.50408 11.69887
## 2 0 0 0.0000000 0.000000 0.00000000 12.29554 11.70358
## 3 0 100 0.1403026 2.775008 0.06594222 20.16882 11.69863
## 4 0 0 0.0000000 0.000000 0.00000000 10.68749 11.70473
## 5 0 0 0.0000000 0.000000 0.00000000 13.00654 11.70902
## 6 0 0 0.0000000 0.000000 0.00000000 22.17338 11.70736
## lon utm_x utm_y b.re g.re r.re rededge nir.re
## 1 -1.982213 610922.2 1293456 819.4953 1366.672 2012.315 2481.336 3365.917
## 2 -1.982173 610924.7 1293977 1229.4334 1855.866 2637.134 3025.833 3762.842
## 3 -1.978797 611294.6 1293430 851.0370 1334.529 1921.262 2374.430 3161.094
## 4 -1.975357 611667.1 1294106 1104.8116 1758.811 2561.255 2980.392 3798.727
## 5 -1.977112 611474.1 1294580 839.8548 1343.992 1987.241 2478.641 3291.341
## 6 -1.976407 611551.6 1294397 870.4163 1422.145 2121.354 2550.309 3316.303
## b g r nir swir1 swir2
## 1 951.6492 1394.592 1817.576 3142.439 3840.167 2950.875
## 2 1204.6968 1756.546 2293.718 3413.576 4432.911 3835.910
## 3 973.9948 1424.100 1842.532 3055.005 3866.137 3044.507
## 4 1154.8354 1697.474 2218.241 3519.053 4218.354 3540.430
## 5 990.9266 1443.459 1904.928 3198.101 3982.312 3107.111
## 6 1083.4366 1569.858 2078.348 3241.671 4099.756 3256.872
# remove unnecessary data
rm(Plot.data, plots)
# test accuracy for single variable
# set "k", method and x-variables
# test several values of k
k <- 3
method <- "msn"
yvar <- "agb.ha"
xvars <- bands_re
graphics.off()
cv.plot(data, yvar, xvars, k, method)
## [1] "agb.ha"
## BIAS: 0.103 (0.55%)
## RMSE: 11.7 (62.4%)
## Pseudo R.squared: 0.375
## [1] 0.103 0.550 11.700 62.400 0.375
# accuracy statistics are printed on console
# plots shows observed vs. predicted values
# test accuracy for several variables
# make vector of y-variables
yvars <- c("ba.ha","agb.ha","h.mean","c.can")
k <- 3
method <- "msn"
xvars <- bands_re
par(mfrow=c(2,2)) #four variables are plotted in four panels
res <- c()
for(i in 1:length(yvars)){
res <- rbind(res, cv.plot(data, yvars[i], xvars, k, method))
}
## [1] "ba.ha"
## BIAS: -0.163 (-3.05%)
## RMSE: 2.79 (52.2%)
## Pseudo R.squared: 0.468
## [1] "agb.ha"
## BIAS: 0.103 (0.55%)
## RMSE: 11.7 (62.4%)
## Pseudo R.squared: 0.375
## [1] "h.mean"
## BIAS: 0.0787 (1.29%)
## RMSE: 2.4 (39.4%)
## Pseudo R.squared: 0.00517
## [1] "c.can"
## BIAS: 0.298 (1.1%)
## RMSE: 13.8 (51.2%)
## Pseudo R.squared: 0.458
# organize results
res <- cbind(yvars, res)
res <- as.data.frame(res)
names(res) <- c("Attribute","Bias","Bias (%)","RMSE","RMSE (%)","R.squared")
res
## Attribute Bias Bias (%) RMSE RMSE (%) R.squared
## 1 ba.ha -0.163 -3.05 2.79 52.2 0.468
## 2 agb.ha 0.103 0.55 11.7 62.4 0.375
## 3 h.mean 0.0787 1.29 2.4 39.4 0.00517
## 4 c.can 0.298 1.1 13.8 51.2 0.458
# test accuracy for single variable
# set "k", method and x-variables
# test several values of k
k <- 3
method <- "msn"
yvar <- "agb.ha"
xvars <- bands_l8
graphics.off()
cv.plot(data, yvar, xvars, k, method)
## [1] "agb.ha"
## BIAS: 0.0404 (0.216%)
## RMSE: 11.1 (59.2%)
## Pseudo R.squared: 0.424
## [1] 0.0404 0.2160 11.1000 59.2000 0.4240
# accuracy statistics are printed on console
# plots shows observed vs. predicted values
# test accuracy for several variables
# make vector of y-variables
yvars <- c("ba.ha","agb.ha","h.mean","c.can")
k <- 3
method <- "msn"
xvars <- bands_l8
par(mfrow=c(2,2)) #four variables are plotted in four panels
res <- c()
for(i in 1:length(yvars)){
res <- rbind(res, cv.plot(data, yvars[i], xvars, k, method))
}
## [1] "ba.ha"
## BIAS: 0.0793 (1.48%)
## RMSE: 2.52 (47.1%)
## Pseudo R.squared: 0.564
## [1] "agb.ha"
## BIAS: 0.0404 (0.216%)
## RMSE: 11.1 (59.2%)
## Pseudo R.squared: 0.424
## [1] "h.mean"
## BIAS: -0.0183 (-0.3%)
## RMSE: 2.17 (35.6%)
## Pseudo R.squared: 0.0732
## [1] "c.can"
## BIAS: 0.286 (1.06%)
## RMSE: 13.4 (49.7%)
## Pseudo R.squared: 0.484
# organize results
res <- cbind(yvars, res)
res <- as.data.frame(res)
names(res) <- c("Attribute","Bias","Bias (%)","RMSE","RMSE (%)","R.squared")
res
## Attribute Bias Bias (%) RMSE RMSE (%) R.squared
## 1 ba.ha 0.0793 1.48 2.52 47.1 0.564
## 2 agb.ha 0.0404 0.216 11.1 59.2 0.424
## 3 h.mean -0.0183 -0.3 2.17 35.6 0.0732
## 4 c.can 0.286 1.06 13.4 49.7 0.484
# create yai-object
# select vegetation attributes
yvars <- c("agb.ha","c.can")
xvars <- bands_re
y <- as.data.frame(data[,yvars])
x <- as.data.frame(data[,xvars])
k = 3
method = "msn"
msn <- yai(x=x, y=y, k=k, data=data, method=method)
# convert RapidEye image to data frame
rapideye.30m.df <- as.data.frame(rapideye.30m)
# add 160 observations to data frame
r160 <- as.data.frame(matrix(nrow=160, ncol=5, 0))
names(r160) <- bands_re
rapideye.30m.df <- rbind(r160, rapideye.30m.df)
# predict values for pixels
pred <- predict(msn, newdata=rapideye.30m.df, k=3, method="dstWeighted", observed=FALSE)
# ignore warning: this is why 160 rows were added above
#RapidEye
# convert data frame back to raster attribute by attribute
# nrow and ncol depend on the image size
# set extent and projection based on the image
# aboveground biomass
agb_ha <- raster(matrix(pred$agb.ha, nrow=585, ncol=620, byrow=TRUE))
extent(agb_ha) <- rapideye.30m
projection(agb_ha) <- projection(rapideye.30m)
# canopy cover
cc <- raster(matrix(pred$c.can, nrow=585, ncol=620, byrow=TRUE))
extent(cc) <- rapideye.30m
projection(cc) <- projection(rapideye.30m)
# plot rasters
par(mfrow=c(1,2))
plot(agb_ha, main="Aboveground biomass (Mg/ha)")
plot(cc, main="Canopy cover (%)")
# compute mean based on the raster
round(cellStats(agb_ha, stat='mean'), 2)
## [1] 15.66
round(cellStats(cc, stat='mean'), 2)
## [1] 23.76
# should be close to the field based estimate
# create yai-object
# select vegetation attributes
yvars <- c("agb.ha","c.can")
xvars <- bands_l8
y <- as.data.frame(data[,yvars])
x <- as.data.frame(data[,xvars])
k = 3
method = "msn"
msn <- yai(x=x, y=y, k=k, data=data, method=method)
# convert Landsat image to data frame
landsat.df <- as.data.frame(landsat)
# add 160 observations to data frame
r160 <- as.data.frame(matrix(nrow=160, ncol=6, 0))
names(r160) <- bands_l8
landsat.df <- rbind(r160, landsat.df)
# predict values for pixels
pred <- predict(msn, newdata=landsat.df, k=3, method="dstWeighted", observed=FALSE)
# ignore warning: this is why 160 rows were added above
#Lansaat
# convert data frame back to raster attribute by attribute
# nrow and ncol depend on the image size
# set extent and projection based on the image
# aboveground biomass
agb_ha <- raster(matrix(pred$agb.ha, nrow=585, ncol=620, byrow=TRUE))
extent(agb_ha) <- landsat
projection(agb_ha) <- projection(landsat)
# canopy cover
cc <- raster(matrix(pred$c.can, nrow=585, ncol=620, byrow=TRUE))
extent(cc) <- landsat
projection(cc) <- projection(landsat)
# plot rasters
par(mfrow=c(1,2))
plot(agb_ha, main="Aboveground biomass (Mg/ha)")
plot(cc, main="Canopy cover (%)")
# compute mean based on the raster
round(cellStats(agb_ha, stat='mean'), 2)
## [1] 16.13
round(cellStats(cc, stat='mean'), 2)
## [1] 23.42
# should be close to the field based estimate